Author

Tony Duan

Published

January 1, 2022

1 duckdb

1.1 connect

Code
library(duckdb)
# to start an in-memory database
con <- dbConnect(duckdb())

1.2 write data into duckdb

Code
dbWriteTable(con, "iris_table", iris)
Code
data <- dbGetQuery(con, "SELECT * FROM iris_table limit 5")
Code
data
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa

2 duckplyr

Code
#install.packages("conflicted")
#install.packages("duckplyr")
library(conflicted)
library(duckplyr)
library(duckdb)
conflict_prefer("filter", "duckplyr")
Code
out <-
  palmerpenguins::penguins %>%
  # CAVEAT: factor columns are not supported yet
  mutate(across(where(is.factor), as.character)) %>%
  as_duckplyr_df() %>%
  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
  filter(species != "Gentoo")
Code
class(out)
[1] "duckplyr_df" "tbl_df"      "tbl"         "data.frame" 
Code
names(out)
[1] "species"        "sex"            "mean_bill_area"
Code
out %>%
  explain()
┌───────────────────────────┐
│       HASH_GROUP_BY       │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│             #0            │
│             #1            │
│          mean(#2)         │
└─────────────┬─────────────┘                             
┌─────────────┴─────────────┐
│         PROJECTION        │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│          species          │
│            sex            │
│         bill_area         │
└─────────────┬─────────────┘                             
┌─────────────┴─────────────┐
│         PROJECTION        │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│          species          │
│            sex            │
│         bill_area         │
└─────────────┬─────────────┘                             
┌─────────────┴─────────────┐
│           FILTER          │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│   (species != 'Gentoo')   │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│           EC: 68          │
└─────────────┬─────────────┘                             
┌─────────────┴─────────────┐
│     R_DATAFRAME_SCAN      │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│         data.frame        │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│          species          │
│       bill_length_mm      │
│       bill_depth_mm       │
│            sex            │
│   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
│          EC: 344          │
└───────────────────────────┘                             

3 reference

https://www.youtube.com/watch?v=V9GwSPjKMKw

https://github.com/duckdblabs/duckplyr/

https://github.com/duckdb/duckdb-r

https://duckdb.org/docs/api/r.html